import json

# Load the generated_captions file
with open('YOUR_PATH/generated_captions.jsonl', 'r') as file:
    generated_captions_data = [json.loads(line) for line in file]

# Load the val.jsonl file
with open('YOUR_PATH/Story-result/val.jsonl', 'r') as file:
    val_data = [json.loads(line) for line in file]

# Initialize the list to store the combined data
combined_data = []

# We will pair the captions from generated_captions with the images in val.jsonl
caption_index = 0

# Iterate over the val data (which contains images)
for item in val_data:
    # Take 30 image paths from the val data (item)
    images = item['images']
    
    # Ensure we get exactly 10 captions, one for each image set
    captions = []
    
    for i in range(10):
        if caption_index < len(generated_captions_data):
            captions.append(generated_captions_data[caption_index]['generated_captions'][0])  # each item has 1 caption
        
        # Move to the next caption set for the next iteration
        caption_index += 1
    
    # Create a new item combining the 30 images and the 10 captions
    combined_item = {
        'images': images,
        'captions': captions
    }
    
    # Add the combined item to the list
    combined_data.append(combined_item)

# Save the combined data to a new JSONL file
output_path = 'YOUR_PATH/combined_output.jsonl'
with open(output_path, 'w') as output_file:
    for entry in combined_data:
        output_file.write(json.dumps(entry) + '\n')

print(f"Combined data saved to {output_path}")
